{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "clip-onnx-benchmark-gpu.ipynb",
      "provenance": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    },
    "accelerator": "GPU"
  },
  "cells": [
    {
      "cell_type": "markdown",
      "source": [
        "## Restart colab session after installation\n",
        "Reload the session if something doesn't work"
      ],
      "metadata": {
        "id": "fxPg_VvZuScV"
      }
    },
    {
      "cell_type": "code",
      "execution_count": 1,
      "metadata": {
        "id": "al_QNjyFq6Jj"
      },
      "outputs": [],
      "source": [
        "%%capture\n",
        "!pip install git+https://github.com/Lednik7/CLIP-ONNX.git\n",
        "!pip install git+https://github.com/openai/CLIP.git\n",
        "!pip install onnxruntime-gpu"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "%%capture\n",
        "!wget -c -O CLIP.png https://github.com/openai/CLIP/blob/main/CLIP.png?raw=true"
      ],
      "metadata": {
        "id": "42eeJz9lTdJ6"
      },
      "execution_count": 2,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "!nvidia-smi"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "XuauIZIBSEUX",
        "outputId": "7e2b352b-751e-439e-bb3d-4e1323e2e44d"
      },
      "execution_count": 3,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Thu Jan  6 15:47:04 2022       \n",
            "+-----------------------------------------------------------------------------+\n",
            "| NVIDIA-SMI 495.44       Driver Version: 460.32.03    CUDA Version: 11.2     |\n",
            "|-------------------------------+----------------------+----------------------+\n",
            "| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |\n",
            "| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |\n",
            "|                               |                      |               MIG M. |\n",
            "|===============================+======================+======================|\n",
            "|   0  Tesla K80           Off  | 00000000:00:04.0 Off |                    0 |\n",
            "| N/A   34C    P8    28W / 149W |      0MiB / 11441MiB |      0%      Default |\n",
            "|                               |                      |                  N/A |\n",
            "+-------------------------------+----------------------+----------------------+\n",
            "                                                                               \n",
            "+-----------------------------------------------------------------------------+\n",
            "| Processes:                                                                  |\n",
            "|  GPU   GI   CI        PID   Type   Process name                  GPU Memory |\n",
            "|        ID   ID                                                   Usage      |\n",
            "|=============================================================================|\n",
            "|  No running processes found                                                 |\n",
            "+-----------------------------------------------------------------------------+\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "import onnxruntime\n",
        "print(onnxruntime.get_device())"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "gqvxpdajRX5_",
        "outputId": "7c44b4e1-d916-42d9-cc61-52efdf0fa9a9"
      },
      "execution_count": 20,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "GPU\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "## GPU inference mode\n",
        "Select a runtime GPU to continue:\n",
        "\n",
        "Click Runtime -> Change Runtime Type -> switch \"Harware accelerator\" to be GPU. Save it, and you maybe connect to GPU"
      ],
      "metadata": {
        "id": "010k-ksVTjAu"
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "### Torch CLIP"
      ],
      "metadata": {
        "id": "KdTz0IJWVBqE"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "import clip\n",
        "from PIL import Image\n",
        "import numpy as np\n",
        "\n",
        "# onnx cannot work with cuda\n",
        "model, preprocess = clip.load(\"ViT-B/32\", device=\"cpu\", jit=False)\n",
        "\n",
        "# batch first\n",
        "image = preprocess(Image.open(\"CLIP.png\")).unsqueeze(0) # [1, 3, 224, 224]\n",
        "image_onnx = image.detach().cpu().numpy().astype(np.float32)\n",
        "\n",
        "# batch first\n",
        "text = clip.tokenize([\"a diagram\", \"a dog\", \"a cat\"]) # [3, 77]\n",
        "text_onnx = text.detach().cpu().numpy().astype(np.int64)"
      ],
      "metadata": {
        "id": "9ROPwKYurOhP"
      },
      "execution_count": 1,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "### CLIP-ONNX"
      ],
      "metadata": {
        "id": "Ao2MriaVVG6Y"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "from clip_onnx import clip_onnx, attention\n",
        "clip.model.ResidualAttentionBlock.attention = attention\n",
        "\n",
        "onnx_model = clip_onnx(model)\n",
        "onnx_model.convert2onnx(image, text, verbose=False)\n",
        "# ['TensorrtExecutionProvider', 'CUDAExecutionProvider', 'CPUExecutionProvider']\n",
        "onnx_model.start_sessions(providers=[\"CPUExecutionProvider\"]) # GPU mode"
      ],
      "metadata": {
        "id": "nSeG9uAZrcph",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "25e07d68-6ef2-44c4-d144-c43b611f3316"
      },
      "execution_count": 4,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "/usr/local/lib/python3.7/dist-packages/clip_onnx/utils.py:40: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').\n",
            "  head_dim = q.shape[2] // num_heads\n",
            "/usr/local/lib/python3.7/dist-packages/torch/onnx/symbolic_helper.py:716: UserWarning: allowzero=0 by default. In order to honor zero value in shape use allowzero=1\n",
            "  warnings.warn(\"allowzero=0 by default. In order to honor zero value in shape use allowzero=1\")\n",
            "/usr/local/lib/python3.7/dist-packages/torch/onnx/symbolic_opset9.py:2819: UserWarning: Exporting aten::index operator of advanced indexing in opset 14 is achieved by combination of multiple ONNX operators, including Reshape, Transpose, Concat, and Gather. If indices include negative values, the exported graph will produce incorrect results.\n",
            "  \"If indices include negative values, the exported graph will produce incorrect results.\")\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "from clip_onnx import clip_onnx, attention\n",
        "clip.model.ResidualAttentionBlock.attention = attention"
      ],
      "metadata": {
        "id": "imMVbHFO-KSH"
      },
      "execution_count": 2,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "onnx_model = clip_onnx(model)\n",
        "onnx_model.load_onnx(\"/content/clip_visual.onnx\",\n",
        "                     \"/content/clip_textual.onnx\",\n",
        "                     model.logit_scale.exp())\n",
        "onnx_model.start_sessions(providers=[\"CUDAExecutionProvider\"]) # GPU mode"
      ],
      "metadata": {
        "id": "PsDS7ty79zZf"
      },
      "execution_count": 3,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "onnx_model.visual_session.get_providers()"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "aZsGJNrbNCYe",
        "outputId": "9dcdd2d6-2a73-4dad-9ea7-c2892273c631"
      },
      "execution_count": 4,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "['CUDAExecutionProvider', 'CPUExecutionProvider']"
            ]
          },
          "metadata": {},
          "execution_count": 4
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "## Benchmark"
      ],
      "metadata": {
        "id": "J5IcOG_6jAFz"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "model, preprocess = clip.load(\"ViT-B/32\", device=\"cuda\", jit=False)"
      ],
      "metadata": {
        "id": "SJ_5_x7vLepK"
      },
      "execution_count": 5,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "model.eval()\n",
        "for x in model.parameters():\n",
        "    x.requires_grad = False"
      ],
      "metadata": {
        "id": "OnOzZ3LMuubW"
      },
      "execution_count": 6,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "import numpy, random, torch"
      ],
      "metadata": {
        "id": "wDwqRRrTGKUS"
      },
      "execution_count": 7,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "def set_seed():\n",
        "    torch.manual_seed(12)\n",
        "    torch.cuda.manual_seed(12)\n",
        "    np.random.seed(12)\n",
        "    random.seed(12)\n",
        "\n",
        "    torch.backends.cudnn.deterministic=True"
      ],
      "metadata": {
        "id": "9H17n_6gGJgT"
      },
      "execution_count": 8,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "%timeit onnx_model.encode_image(image_onnx)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "IsJ2TsBRNh8f",
        "outputId": "bb642ee7-0112-4195-be35-14fdf719e7bc"
      },
      "execution_count": 9,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "The slowest run took 23.27 times longer than the fastest. This could mean that an intermediate result is being cached.\n",
            "1 loop, best of 5: 20.1 ms per loop\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "import torch\n",
        "import time\n",
        "\n",
        "n = 5\n",
        "clip_results = {\"encode_image\": [],\n",
        "                \"encode_text\": []}\n",
        "onnx_results = {\"encode_image\": [],\n",
        "                \"encode_text\": []}\n",
        "for batch in [2, 8, 16, 32, 64]:\n",
        "    set_seed()\n",
        "    image_input = torch.randint(1, 255, (batch, 3, 224, 224)).cuda()\n",
        "    text_input = torch.randint(320, 49407, (batch, 77)).cuda()\n",
        "    image_input_onnx = image_input.detach().cpu().numpy().astype(np.float32)\n",
        "    text_input_onnx = text_input.detach().cpu().numpy().astype(np.int64)\n",
        "\n",
        "    t_mean = []\n",
        "    for _ in range(n):\n",
        "        t = time.time()\n",
        "        onnx_model.encode_image(image_input_onnx)\n",
        "        t_mean.append(time.time() - t)\n",
        "    print(\"onnx\", batch, \"encode_image\", round(sum(t_mean) / n, 3))\n",
        "    torch.cuda.empty_cache()\n",
        "    onnx_results[\"encode_image\"].append([batch, round(sum(t_mean) / n, 3)])\n",
        "\n",
        "    with torch.inference_mode():\n",
        "        t_mean = []\n",
        "        for _ in range(n):\n",
        "            t = time.time()\n",
        "            model.encode_image(image_input)\n",
        "            t_mean.append(time.time() - t)\n",
        "    print(\"torch\", batch, \"encode_image\", round(sum(t_mean) / n, 3))\n",
        "    torch.cuda.empty_cache()\n",
        "    clip_results[\"encode_image\"].append([batch, round(sum(t_mean) / n, 3)])\n",
        "\n",
        "    t_mean = []\n",
        "    for _ in range(n):\n",
        "        t = time.time()\n",
        "        onnx_model.encode_text(text_input_onnx)\n",
        "        t_mean.append(time.time() - t)\n",
        "    print(\"onnx\", batch, \"encode_text\", round(sum(t_mean) / n, 3))\n",
        "    torch.cuda.empty_cache()\n",
        "    onnx_results[\"encode_text\"].append([batch, round(sum(t_mean) / n, 3)])\n",
        "\n",
        "    with torch.inference_mode():\n",
        "        t_mean = []\n",
        "        for _ in range(n):\n",
        "            t = time.time()\n",
        "            model.encode_text(text_input)\n",
        "            t_mean.append(time.time() - t)\n",
        "    print(\"torch\", batch, \"encode_text\", round(sum(t_mean) / n, 3))\n",
        "    torch.cuda.empty_cache()\n",
        "    clip_results[\"encode_text\"].append([batch, round(sum(t_mean) / n, 3)])\n",
        "\n",
        "    print(\"-\" * 78)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "4lFL6tzWjiWL",
        "outputId": "a209b78a-fe78-4b46-9220-4b9624a1568f"
      },
      "execution_count": 10,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "onnx 2 encode_image 0.073\n",
            "torch 2 encode_image 0.041\n",
            "onnx 2 encode_text 0.032\n",
            "torch 2 encode_text 0.033\n",
            "------------------------------------------------------------------------------\n",
            "onnx 8 encode_image 0.088\n",
            "torch 8 encode_image 0.128\n",
            "onnx 8 encode_text 0.052\n",
            "torch 8 encode_text 0.102\n",
            "------------------------------------------------------------------------------\n",
            "onnx 16 encode_image 0.123\n",
            "torch 16 encode_image 0.258\n",
            "onnx 16 encode_text 0.08\n",
            "torch 16 encode_text 0.201\n",
            "------------------------------------------------------------------------------\n",
            "onnx 32 encode_image 0.196\n",
            "torch 32 encode_image 0.505\n",
            "onnx 32 encode_text 0.138\n",
            "torch 32 encode_text 0.386\n",
            "------------------------------------------------------------------------------\n",
            "onnx 64 encode_image 0.352\n",
            "torch 64 encode_image 0.995\n",
            "onnx 64 encode_text 0.252\n",
            "torch 64 encode_text 0.754\n",
            "------------------------------------------------------------------------------\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "import pandas as pd"
      ],
      "metadata": {
        "id": "P2YhbE9v_4ci"
      },
      "execution_count": 11,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "pd.DataFrame({\"backend\": [\"onnx\", \"torch\"] * 5,\n",
        "              \"batch\": [2, 2, 8, 8, 16, 16, 32, 32, 64, 64],\n",
        "              \"encode_image\": [j[1] for i in zip(onnx_results[\"encode_image\"],\n",
        "                                              clip_results[\"encode_image\"]) for j in i],\n",
        "              \"encode_text\": [j[1] for i in zip(onnx_results[\"encode_text\"],\n",
        "                                              clip_results[\"encode_text\"]) for j in i]})"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 362
        },
        "id": "WfZfDk4PAlqm",
        "outputId": "aa180c38-35f8-403c-a172-4e78266510d5"
      },
      "execution_count": 12,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "\n",
              "  <div id=\"df-1af743e1-b1a7-4998-8692-878c02e1ea97\">\n",
              "    <div class=\"colab-df-container\">\n",
              "      <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>backend</th>\n",
              "      <th>batch</th>\n",
              "      <th>encode_image</th>\n",
              "      <th>encode_text</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>onnx</td>\n",
              "      <td>2</td>\n",
              "      <td>0.073</td>\n",
              "      <td>0.032</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>torch</td>\n",
              "      <td>2</td>\n",
              "      <td>0.041</td>\n",
              "      <td>0.033</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>onnx</td>\n",
              "      <td>8</td>\n",
              "      <td>0.088</td>\n",
              "      <td>0.052</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>torch</td>\n",
              "      <td>8</td>\n",
              "      <td>0.128</td>\n",
              "      <td>0.102</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>onnx</td>\n",
              "      <td>16</td>\n",
              "      <td>0.123</td>\n",
              "      <td>0.080</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>5</th>\n",
              "      <td>torch</td>\n",
              "      <td>16</td>\n",
              "      <td>0.258</td>\n",
              "      <td>0.201</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>6</th>\n",
              "      <td>onnx</td>\n",
              "      <td>32</td>\n",
              "      <td>0.196</td>\n",
              "      <td>0.138</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>7</th>\n",
              "      <td>torch</td>\n",
              "      <td>32</td>\n",
              "      <td>0.505</td>\n",
              "      <td>0.386</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>8</th>\n",
              "      <td>onnx</td>\n",
              "      <td>64</td>\n",
              "      <td>0.352</td>\n",
              "      <td>0.252</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>9</th>\n",
              "      <td>torch</td>\n",
              "      <td>64</td>\n",
              "      <td>0.995</td>\n",
              "      <td>0.754</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>\n",
              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-1af743e1-b1a7-4998-8692-878c02e1ea97')\"\n",
              "              title=\"Convert this dataframe to an interactive table.\"\n",
              "              style=\"display:none;\">\n",
              "        \n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "       width=\"24px\">\n",
              "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
              "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
              "  </svg>\n",
              "      </button>\n",
              "      \n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      flex-wrap:wrap;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "      <script>\n",
              "        const buttonEl =\n",
              "          document.querySelector('#df-1af743e1-b1a7-4998-8692-878c02e1ea97 button.colab-df-convert');\n",
              "        buttonEl.style.display =\n",
              "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "        async function convertToInteractive(key) {\n",
              "          const element = document.querySelector('#df-1af743e1-b1a7-4998-8692-878c02e1ea97');\n",
              "          const dataTable =\n",
              "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                     [key], {});\n",
              "          if (!dataTable) return;\n",
              "\n",
              "          const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "            + ' to learn more about interactive tables.';\n",
              "          element.innerHTML = '';\n",
              "          dataTable['output_type'] = 'display_data';\n",
              "          await google.colab.output.renderOutput(dataTable, element);\n",
              "          const docLink = document.createElement('div');\n",
              "          docLink.innerHTML = docLinkHtml;\n",
              "          element.appendChild(docLink);\n",
              "        }\n",
              "      </script>\n",
              "    </div>\n",
              "  </div>\n",
              "  "
            ],
            "text/plain": [
              "  backend  batch  encode_image  encode_text\n",
              "0    onnx      2         0.073        0.032\n",
              "1   torch      2         0.041        0.033\n",
              "2    onnx      8         0.088        0.052\n",
              "3   torch      8         0.128        0.102\n",
              "4    onnx     16         0.123        0.080\n",
              "5   torch     16         0.258        0.201\n",
              "6    onnx     32         0.196        0.138\n",
              "7   torch     32         0.505        0.386\n",
              "8    onnx     64         0.352        0.252\n",
              "9   torch     64         0.995        0.754"
            ]
          },
          "metadata": {},
          "execution_count": 12
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "onnx_df = pd.DataFrame({\"ONNX\": [\"ViT-B/32\"] * 5,\n",
        "              \"batch\": [2, 8, 16, 32, 64],\n",
        "              \"encode_image\": [i[1] for i in onnx_results[\"encode_image\"]],\n",
        "              \"encode_text\": [i[1] for i in onnx_results[\"encode_text\"]]})\n",
        "onnx_df[\"summary\"] = onnx_df[\"encode_image\"] + onnx_df[\"encode_text\"]"
      ],
      "metadata": {
        "id": "Xpw9lV7yBbA8"
      },
      "execution_count": 13,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "onnx_df"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 206
        },
        "id": "LItAyQkeDhnQ",
        "outputId": "ebd84ad1-f305-4578-9164-2884aaa2b245"
      },
      "execution_count": 14,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "\n",
              "  <div id=\"df-37ded07e-3c67-4ace-b569-8fa6f4dd5a44\">\n",
              "    <div class=\"colab-df-container\">\n",
              "      <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>ONNX</th>\n",
              "      <th>batch</th>\n",
              "      <th>encode_image</th>\n",
              "      <th>encode_text</th>\n",
              "      <th>summary</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>ViT-B/32</td>\n",
              "      <td>2</td>\n",
              "      <td>0.073</td>\n",
              "      <td>0.032</td>\n",
              "      <td>0.105</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>ViT-B/32</td>\n",
              "      <td>8</td>\n",
              "      <td>0.088</td>\n",
              "      <td>0.052</td>\n",
              "      <td>0.140</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>ViT-B/32</td>\n",
              "      <td>16</td>\n",
              "      <td>0.123</td>\n",
              "      <td>0.080</td>\n",
              "      <td>0.203</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>ViT-B/32</td>\n",
              "      <td>32</td>\n",
              "      <td>0.196</td>\n",
              "      <td>0.138</td>\n",
              "      <td>0.334</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>ViT-B/32</td>\n",
              "      <td>64</td>\n",
              "      <td>0.352</td>\n",
              "      <td>0.252</td>\n",
              "      <td>0.604</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>\n",
              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-37ded07e-3c67-4ace-b569-8fa6f4dd5a44')\"\n",
              "              title=\"Convert this dataframe to an interactive table.\"\n",
              "              style=\"display:none;\">\n",
              "        \n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "       width=\"24px\">\n",
              "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
              "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
              "  </svg>\n",
              "      </button>\n",
              "      \n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      flex-wrap:wrap;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "      <script>\n",
              "        const buttonEl =\n",
              "          document.querySelector('#df-37ded07e-3c67-4ace-b569-8fa6f4dd5a44 button.colab-df-convert');\n",
              "        buttonEl.style.display =\n",
              "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "        async function convertToInteractive(key) {\n",
              "          const element = document.querySelector('#df-37ded07e-3c67-4ace-b569-8fa6f4dd5a44');\n",
              "          const dataTable =\n",
              "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                     [key], {});\n",
              "          if (!dataTable) return;\n",
              "\n",
              "          const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "            + ' to learn more about interactive tables.';\n",
              "          element.innerHTML = '';\n",
              "          dataTable['output_type'] = 'display_data';\n",
              "          await google.colab.output.renderOutput(dataTable, element);\n",
              "          const docLink = document.createElement('div');\n",
              "          docLink.innerHTML = docLinkHtml;\n",
              "          element.appendChild(docLink);\n",
              "        }\n",
              "      </script>\n",
              "    </div>\n",
              "  </div>\n",
              "  "
            ],
            "text/plain": [
              "       ONNX  batch  encode_image  encode_text  summary\n",
              "0  ViT-B/32      2         0.073        0.032    0.105\n",
              "1  ViT-B/32      8         0.088        0.052    0.140\n",
              "2  ViT-B/32     16         0.123        0.080    0.203\n",
              "3  ViT-B/32     32         0.196        0.138    0.334\n",
              "4  ViT-B/32     64         0.352        0.252    0.604"
            ]
          },
          "metadata": {},
          "execution_count": 14
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "print(onnx_df.to_markdown(index=False))"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "AIQDA9FaJZ7Y",
        "outputId": "4fdfd92a-5c8c-43d9-e875-7bcddc882113"
      },
      "execution_count": 15,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "| ONNX     |   batch |   encode_image |   encode_text |   summary |\n",
            "|:---------|--------:|---------------:|--------------:|----------:|\n",
            "| ViT-B/32 |       2 |          0.073 |         0.032 |     0.105 |\n",
            "| ViT-B/32 |       8 |          0.088 |         0.052 |     0.14  |\n",
            "| ViT-B/32 |      16 |          0.123 |         0.08  |     0.203 |\n",
            "| ViT-B/32 |      32 |          0.196 |         0.138 |     0.334 |\n",
            "| ViT-B/32 |      64 |          0.352 |         0.252 |     0.604 |\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "clip_df = pd.DataFrame({\"TORCH\": [\"ViT-B/32\"] * 5,\n",
        "              \"batch\": [2, 8, 16, 32, 64],\n",
        "              \"encode_image\": [i[1] for i in clip_results[\"encode_image\"]],\n",
        "              \"encode_text\": [i[1] for i in clip_results[\"encode_text\"]]})\n",
        "clip_df[\"summary\"] = clip_df[\"encode_image\"] + clip_df[\"encode_text\"]"
      ],
      "metadata": {
        "id": "E1OXQUDvDZmI"
      },
      "execution_count": 16,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "print(clip_df.to_markdown(index=False))"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "xAj-ynhCDpPO",
        "outputId": "6a36903d-6bba-4675-8eb3-7f58af98e165"
      },
      "execution_count": 17,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "| TORCH    |   batch |   encode_image |   encode_text |   summary |\n",
            "|:---------|--------:|---------------:|--------------:|----------:|\n",
            "| ViT-B/32 |       2 |          0.041 |         0.033 |     0.074 |\n",
            "| ViT-B/32 |       8 |          0.128 |         0.102 |     0.23  |\n",
            "| ViT-B/32 |      16 |          0.258 |         0.201 |     0.459 |\n",
            "| ViT-B/32 |      32 |          0.505 |         0.386 |     0.891 |\n",
            "| ViT-B/32 |      64 |          0.995 |         0.754 |     1.749 |\n"
          ]
        }
      ]
    }
  ]
}